importing packages
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggrepel)
library(geodata)
## Warning: package 'geodata' was built under R version 4.3.3
## Loading required package: terra
## Warning: package 'terra' was built under R version 4.3.3
## terra 1.8.15
##
## Attaching package: 'terra'
##
## The following object is masked from 'package:tidyr':
##
## extract
library(sf)
## Warning: package 'sf' was built under R version 4.3.3
## Linking to GEOS 3.13.0, GDAL 3.8.5, PROJ 9.5.1; sf_use_s2() is TRUE
library(geosphere)
## Warning: package 'geosphere' was built under R version 4.3.3
library(cartogram)
##
## Attaching package: 'cartogram'
##
## The following object is masked from 'package:terra':
##
## cartogram
library(readxl)
library(magick)
## Warning: package 'magick' was built under R version 4.3.3
## Linking to ImageMagick 6.9.12.93
## Enabled features: cairo, fontconfig, freetype, heic, lcms, pango, raw, rsvg, webp
## Disabled features: fftw, ghostscript, x11
library(viridis)
## Loading required package: viridisLite
library(ggtext)
loading in data
#listing female files files
file_paths <- list.files("new_data", pattern = "^f_\\d+-\\d+\\.csv$", full.names = TRUE)
#reading and cleaning each file
data_list <- lapply(file_paths, function(path) {
df <- read_csv2(path, locale = locale(encoding = "latin1"))
# Get x and y from filename
x_y <- str_match(basename(path), "f_(\\d+)-(\\d+)\\.csv")[,2:3]
colnames(df)[5:14] <- as.character(seq(as.integer(x_y[1]), as.integer(x_y[2])))
# Rename and clean 'orig' and 'dest' columns
df <- df %>%
mutate(
orig = sub("^Fra ", "", orig),
dest = sub("^Til ", "", dest)
)
return(df)
})
## ℹ Using "','" as decimal and "'.'" as grouping mark. Use `read_delim()` for more control.
## Rows: 9800 Columns: 14
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ";"
## chr (3): Kvinder, dest, orig
## dbl (11): 2024, 0...5, 0...6, 0...7, 0...8, 0...9, 0...10, 0...11, 0...12, 0...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## ℹ Using "','" as decimal and "'.'" as grouping mark. Use `read_delim()` for more control.
##
## Rows: 9800 Columns: 14
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ";"
## chr (3): Kvinder, dest, orig
## dbl (11): 2024, 0...5, 0...6, 0...7, 0...8, 0...9, 0...10, 0...11, 0...12, 0...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## ℹ Using "','" as decimal and "'.'" as grouping mark. Use `read_delim()` for more control.
##
## Rows: 9800 Columns: 14
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ";"
## chr (3): Kvinder, dest, orig
## dbl (11): 2024, 0...5, 0...6, 0...7, 0...8, 0...9, 0...10, 0...11, 0...12, 0...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## ℹ Using "','" as decimal and "'.'" as grouping mark. Use `read_delim()` for more control.
##
## Rows: 9800 Columns: 14
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ";"
## chr (3): Kvinder, dest, orig
## dbl (11): 2024, 0...5, 0...6, 0...7, 0...8, 0...9, 0...10, 0...11, 0...12, 0...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## ℹ Using "','" as decimal and "'.'" as grouping mark. Use `read_delim()` for more control.
##
## Rows: 9800 Columns: 14
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ";"
## chr (3): Kvinder, dest, orig
## dbl (11): 2024, 0...5, 0...6, 0...7, 0...8, 0...9, 0...10, 0...11, 0...12, 0...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## ℹ Using "','" as decimal and "'.'" as grouping mark. Use `read_delim()` for more control.
##
## Rows: 9800 Columns: 14
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ";"
## chr (3): Kvinder, dest, orig
## dbl (11): 2024, 0...5, 0...6, 0...7, 0...8, 0...9, 0...10, 0...11, 0...12, 0...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## ℹ Using "','" as decimal and "'.'" as grouping mark. Use `read_delim()` for more control.
##
## Rows: 9800 Columns: 14
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ";"
## chr (3): Kvinder, dest, orig
## dbl (11): 2024, 0...5, 0...6, 0...7, 0...8, 0...9, 0...10, 0...11, 0...12, 0...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## ℹ Using "','" as decimal and "'.'" as grouping mark. Use `read_delim()` for more control.
##
## Rows: 9800 Columns: 14
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ";"
## chr (3): Kvinder, dest, orig
## dbl (11): 2024, 0...5, 0...6, 0...7, 0...8, 0...9, 0...10, 0...11, 0...12, 0...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## ℹ Using "','" as decimal and "'.'" as grouping mark. Use `read_delim()` for more control.
##
## Rows: 9800 Columns: 14
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ";"
## chr (3): Kvinder, dest, orig
## dbl (11): 2024, 0...5, 0...6, 0...7, 0...8, 0...9, 0...10, 0...11, 0...12, 0...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## ℹ Using "','" as decimal and "'.'" as grouping mark. Use `read_delim()` for more control.
##
## Rows: 9800 Columns: 14
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ";"
## chr (3): Kvinder, dest, orig
## dbl (11): 2024, 0...5, 0...6, 0...7, 0...8, 0...9, 0...10, 0...11, 0...12, 0...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#creating female df
combined_df_female <- data_list[[1]]
#binding only the numeric columns (last 10) from the rest
for (i in 2:length(data_list)) {
numeric_cols <- data_list[[i]] %>% select((ncol(.) - 9):ncol(.))
combined_df_female<- bind_cols(combined_df_female, numeric_cols)
}
### doing the same for the male data
# listing all male files
file_paths <- list.files("new_data", pattern = "^m_\\d+-\\d+\\.csv$", full.names = TRUE)
#reading and cleaning each file
data_list <- lapply(file_paths, function(path) {
df <- read_csv2(path, locale = locale(encoding = "latin1"))
#extracting age
x_y <- str_match(basename(path), "m_(\\d+)-(\\d+)\\.csv")[,2:3]
colnames(df)[5:14] <- as.character(seq(as.integer(x_y[1]), as.integer(x_y[2])))
#renaming and cleaning 'orig' and 'dest' columns
df <- df %>%
mutate(
orig = sub("^Fra ", "", orig),
dest = sub("^Til ", "", dest)
)
return(df)
})
## ℹ Using "','" as decimal and "'.'" as grouping mark. Use `read_delim()` for more control.
## Rows: 9800 Columns: 14── Column specification ────────────────────────────────────────────────────────
## Delimiter: ";"
## chr (3): Mænd, dest, orig
## dbl (11): 2024, 0...5, 0...6, 0...7, 0...8, 0...9, 0...10, 0...11, 0...12, 0...
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.ℹ Using "','" as decimal and "'.'" as grouping mark. Use `read_delim()` for more control.
## Rows: 9800 Columns: 14── Column specification ────────────────────────────────────────────────────────
## Delimiter: ";"
## chr (3): Mænd, dest, orig
## dbl (11): 2024, 0...5, 0...6, 0...7, 0...8, 0...9, 0...10, 0...11, 0...12, 0...
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.ℹ Using "','" as decimal and "'.'" as grouping mark. Use `read_delim()` for more control.
## Rows: 9800 Columns: 14── Column specification ────────────────────────────────────────────────────────
## Delimiter: ";"
## chr (3): Mænd, dest, orig
## dbl (11): 2024, 0...5, 0...6, 0...7, 0...8, 0...9, 0...10, 0...11, 0...12, 0...
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.ℹ Using "','" as decimal and "'.'" as grouping mark. Use `read_delim()` for more control.
## Rows: 9800 Columns: 14── Column specification ────────────────────────────────────────────────────────
## Delimiter: ";"
## chr (3): Mænd, dest, orig
## dbl (11): 2024, 0...5, 0...6, 0...7, 0...8, 0...9, 0...10, 0...11, 0...12, 0...
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.ℹ Using "','" as decimal and "'.'" as grouping mark. Use `read_delim()` for more control.
## Rows: 9800 Columns: 14── Column specification ────────────────────────────────────────────────────────
## Delimiter: ";"
## chr (3): Mænd, dest, orig
## dbl (11): 2024, 0...5, 0...6, 0...7, 0...8, 0...9, 0...10, 0...11, 0...12, 0...
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.ℹ Using "','" as decimal and "'.'" as grouping mark. Use `read_delim()` for more control.
## Rows: 9800 Columns: 14── Column specification ────────────────────────────────────────────────────────
## Delimiter: ";"
## chr (3): Mænd, dest, orig
## dbl (11): 2024, 0...5, 0...6, 0...7, 0...8, 0...9, 0...10, 0...11, 0...12, 0...
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.ℹ Using "','" as decimal and "'.'" as grouping mark. Use `read_delim()` for more control.
## Rows: 9800 Columns: 14── Column specification ────────────────────────────────────────────────────────
## Delimiter: ";"
## chr (3): Mænd, dest, orig
## dbl (11): 2024, 0...5, 0...6, 0...7, 0...8, 0...9, 0...10, 0...11, 0...12, 0...
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.ℹ Using "','" as decimal and "'.'" as grouping mark. Use `read_delim()` for more control.
## Rows: 9800 Columns: 14── Column specification ────────────────────────────────────────────────────────
## Delimiter: ";"
## chr (3): Mænd, dest, orig
## dbl (11): 2024, 0...5, 0...6, 0...7, 0...8, 0...9, 0...10, 0...11, 0...12, 0...
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.ℹ Using "','" as decimal and "'.'" as grouping mark. Use `read_delim()` for more control.
## Rows: 9800 Columns: 14── Column specification ────────────────────────────────────────────────────────
## Delimiter: ";"
## chr (3): Mænd, dest, orig
## dbl (11): 2024, 0...5, 0...6, 0...7, 0...8, 0...9, 0...10, 0...11, 0...12, 0...
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.ℹ Using "','" as decimal and "'.'" as grouping mark. Use `read_delim()` for more control.
## Rows: 9800 Columns: 14── Column specification ────────────────────────────────────────────────────────
## Delimiter: ";"
## chr (3): Mænd, dest, orig
## dbl (11): 2024, 0...5, 0...6, 0...7, 0...8, 0...9, 0...10, 0...11, 0...12, 0...
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#saving male
combined_df_male <- data_list[[1]]
# Bind only the numeric columns (last 10) from the rest
for (i in 2:length(data_list)) {
numeric_cols <- data_list[[i]] %>% select((ncol(.) - 9):ncol(.))
combined_df_male <- bind_cols(combined_df_male, numeric_cols)
}
#creating gender columns
combined_df_male <- combined_df_male %>% mutate(sex = 1)
combined_df_female <- combined_df_female %>% mutate(sex = 0)
names(combined_df_female) <- names(combined_df_male) # ensure columns match
combined_df <- rbind(combined_df_male, combined_df_female)
#combining the two
df <- combined_df %>% select(-'Mænd' & -'2024')
#### note: this process was necessary as statistikbanken has a limit to how much data can be downloaded at once - therefore, male and female were downladed separately in 10 age groups each - these are then combined in this code chunk to create one singular dataframe.
cleaning data
# Normalize city names: convert "Århus" to "Aarhus"
df$orig <- recode(df$orig, "Århus" = "Aarhus")
df$dest <- recode(df$dest, "Århus" = "Aarhus")
# adding total column
df <- df %>%
mutate(total = rowSums(select(., matches("^\\d+$"))))
extract coordinates and attach muni name from NAME_2; calculating
centroids and extract coordinates safely
centroids_coords <- munic_sf %>%
st_centroid() %>%
mutate(
lon = st_coordinates(.)[, 1],
lat = st_coordinates(.)[, 2],
muni = NAME_2 # explicitly create a 'muni' column
) %>%
st_drop_geometry() # drop geometry so it's a plain tibble
## Warning: st_centroid assumes attributes are constant over geometries
centroids_coords$muni <- recode(centroids_coords$muni, "Århus" = "Aarhus")
calculating global maximum of movers (ved ik om jeg skal bruge)
# # Calculate the global maximum number of movers
# global_max_movers <- df %>%
# group_by(orig, dest) %>%
# summarise(movers = sum(total), .groups = "drop") %>%
# summarise(max_movers = max(movers)) %>%
# pull(max_movers)
plotting flow line maps
# ─── Define age groups ─────────────────────────────────────────────────────────
age_groups <- list(
"0-17" = 0:17,
"18-29" = 18:29,
"30-44" = 30:44,
"45-65" = 45:65,
"66-99" = 66:99
)
# ─── Define top cities ─────────────────────────────────────────────────────────
top5_cities <- c("København", "Aarhus", "Odense", "Aalborg")
# ─── Calculate global max movers for consistent alpha & linewidth scaling ──────
all_age_cols <- paste0(0:99)
df$total_movers <- rowSums(df[all_age_cols], na.rm = TRUE)
global_max_movers_log <- log1p(max(df$total_movers, na.rm = TRUE))
# ─── Loop over each age group ──────────────────────────────────────────────────
for (age_label in names(age_groups)) {
age_range <- age_groups[[age_label]]
age_cols <- paste0(age_range)
# 1. Aggregate by age group
df_age <- df %>%
mutate(total = rowSums(select(., all_of(age_cols)), na.rm = TRUE))
# 2. Loop over each city
for (city in top5_cities) {
flow_city <- df_age %>%
filter(dest == city, orig != dest) %>%
group_by(orig, dest) %>%
summarise(movers = sum(total), .groups = "drop") %>%
left_join(centroids_coords, by = c("orig" = "muni")) %>%
rename(lon_o = lon, lat_o = lat) %>%
left_join(centroids_coords, by = c("dest" = "muni")) %>%
rename(lon_d = lon, lat_d = lat) %>%
filter(!is.na(lon_o), !is.na(lon_d)) %>%
mutate(
movers_log = log1p(movers),
alpha_scaled = movers_log / global_max_movers_log
)
# 5. Select top origins for labeling
top_origins <- flow_city %>%
slice_max(order_by = movers, n = 10)
# 6. Plot
p <- ggplot() +
geom_sf(data = munic_sf, fill = "lightgreen", color = "white", size = 0.2) +
geom_segment(
data = flow_city,
aes(x = lon_o, y = lat_o, xend = lon_d, yend = lat_d,
linewidth = movers_log, alpha = alpha_scaled),
color = "steelblue"
) +
geom_text_repel(
data = top_origins,
aes(x = lon_o, y = lat_o, label = orig),
size = 3,
color = "black",
max.overlaps = Inf
) +
scale_linewidth_continuous(
range = c(0.3, 2.5), # Adjust thickness as needed
limits = c(0, global_max_movers_log),
name = "Log-scaled movers"
) +
scale_alpha_continuous(
name = "Relative flow (log scale)",
limits = c(0, 1)
) +
guides(
alpha = guide_legend(order = 1),
linewidth = guide_legend(order = 2)
) +
theme_minimal(base_size = 14) +
theme(
legend.title = element_text(size = 10),
legend.text = element_text(size = 8),
panel.background = element_rect(fill = "lightblue", color = NA),
plot.background = element_rect(fill = "lightblue", color = NA),
plot.subtitle = element_markdown(size = 12),
plot.title = element_markdown(size = 14)
) +
labs(
title = paste0("Migration Flows into <b>", city, "</b> 2024"),
subtitle = paste0(
"<span style='color:darkred;'><b>Age group: ", age_label,
"</b></span> | Arrows show width & opacity ∝ movers"
),
x = 'Longitude',
y = 'Latitude'
)
# Save the plot
filename <- paste0("out/migration_flows_", city, "_age_", gsub("-", "_", age_label), ".png")
ggsave(filename = filename, plot = p, width = 8, height = 6, dpi = 300)
print(p)
}
}



## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_segment()`).

## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_segment()`).
















## note: a radius circle was initially created, but we elected not to include it in our final product - the code still remains but is crossed out for curious minds who want to see it.
creating gif
pacman::p_load('magick')
library(magick)
#defining parameters
age_labels <- names(age_groups)
output_dir <- "out"
gif_dir <- "gifs"
dir.create(gif_dir, showWarnings = FALSE)
for (city in top5_cities) {
# Generate file paths for this city's images
image_files <- paste0(output_dir, "/migration_flows_", city, "_age_", gsub("-", "_", age_labels), ".png")
# Read and combine images
images <- image_read(image_files)
animation <- image_animate(image_join(images), fps = 0.5 ) # Adjust fps as desired
# Save gif
gif_filename <- file.path(gif_dir, paste0("migration_flows_", city, ".gif"))
image_write(animation, gif_filename)
}
calculating and visualising mean kilometers traveled per relocation
by age and age groups
# Prepare a dataframe for storing results
age_distances_all <- data.frame()
#loop through each age (0 to 99)
for (age in 0:99) {
# Calculate total movers for this specific age
df_age <- df %>%
filter(!is.na(.[[as.character(age)]])) %>%
mutate(total = .[[as.character(age)]]) %>%
filter(total > 0) # Remove rows with zero movers
# Calculate distances for each movement
distances <- df_age %>%
left_join(centroids_coords, by = c("orig" = "muni")) %>%
rename(lon_o = lon, lat_o = lat) %>%
left_join(centroids_coords, by = c("dest" = "muni")) %>%
rename(lon_d = lon, lat_d = lat) %>%
filter(!is.na(lon_o), !is.na(lon_d)) %>%
mutate(distance_km = distHaversine(cbind(lon_o, lat_o), cbind(lon_d, lat_d)) / 1000) %>%
summarise(
mean_distance = mean(distance_km),
sd_distance = sd(distance_km),
n = n()
) %>%
mutate(
age = age,
se = sd_distance / sqrt(n), # Standard error
ci_lower = mean_distance - 1.96 * se,
ci_upper = mean_distance + 1.96 * se
)
# Append to the main dataframe
age_distances_all <- bind_rows(age_distances_all, distances)
}
age_distances_all <- age_distances_all %>%
mutate(age_group = case_when(
age <= 17 ~ "0–17",
age <= 29 ~ "18–29",
age <= 44 ~ "30–44",
age <= 65 ~ "45–65",
TRUE ~ "66–99"
))
# Create segment dataframe for colored line segments
segment_data <- age_distances_all %>%
arrange(age) %>%
mutate(
xend = lead(age),
yend = lead(mean_distance),
group_end = lead(age_group)
) %>%
filter(!is.na(xend))
# Plot
ggplot() +
# Confidence interval ribbon in grey
geom_ribbon(data = age_distances_all, aes(x = age, ymin = ci_lower, ymax = ci_upper),
fill = "grey80", alpha = 0.3) +
# Base grey line to maintain continuity
geom_line(data = age_distances_all, aes(x = age, y = mean_distance), color = "grey80", size = 1.2) +
# Colored segments by age group
geom_segment(data = segment_data, aes(x = age, y = mean_distance,
xend = xend, yend = yend, color = age_group), size = 1.5) +
# Points colored by age group
geom_point(data = age_distances_all, aes(x = age, y = mean_distance, color = age_group), size = 2) +
labs(
title = "Mean Kilometers Traveled per Age (All Municipalities)",
x = "Age",
y = "Mean Distance (km)",
color = "Age Group"
) +
theme_minimal(base_size = 14) +
theme(
panel.grid.major = element_line(color = "grey80"),
panel.grid.minor = element_blank()
) +
scale_x_continuous(breaks = seq(0, 99, 5)) +
scale_y_continuous(breaks = seq(20, 100, 10)) +
scale_color_manual(values = c(
"0–17" = "#E41A1C",
"18–29" = "#377EB8",
"30–44" = "#4DAF4A",
"45–65" = "#984EA3",
"66–99" = "#FF7F00"
))
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

# Save the plot
ggsave("out/mean_distance_by_age_colored.png", width = 10, height = 6, dpi = 300)
calculating and visualizing total relocations per age and age
group
# Prepare a dataframe for storing results
age_relocations <- data.frame()
# Loop through each age (0 to 99)
for (age in 0:99) {
total_relocations <- df %>%
filter(!is.na(.[[as.character(age)]])) %>%
summarise(total_relocations = sum(.[[as.character(age)]], na.rm = TRUE)) %>%
mutate(age = age)
age_relocations <- bind_rows(age_relocations, total_relocations)
}
# Create age group column
age_relocations <- age_relocations %>%
mutate(age_group = case_when(
age <= 17 ~ "0–17",
age <= 29 ~ "18–29",
age <= 44 ~ "30–44",
age <= 65 ~ "45–65",
TRUE ~ "66–99"
))
# Create a segment dataframe for colored line segments
segment_data <- age_relocations %>%
arrange(age) %>%
mutate(
xend = lead(age),
yend = lead(total_relocations),
group_end = lead(age_group)
) %>%
filter(!is.na(xend)) # Remove last row which has NA in lead
# Plot continuous line in grey and overlay colored segments
ggplot() +
geom_line(data = age_relocations, aes(x = age, y = total_relocations), color = "grey80", size = 1) +
geom_segment(data = segment_data, aes(x = age, y = total_relocations,
xend = xend, yend = yend, color = age_group), size = 1.5) +
geom_point(data = age_relocations, aes(x = age, y = total_relocations, color = age_group), size = 2) +
scale_x_continuous(breaks = seq(0, 99, 5)) +
scale_y_continuous(breaks = seq(0, 20000, 4000)) +
labs(
title = "Total Relocations per Age (All Municipalities)",
x = "Age",
y = "Total Relocations",
color = "Age Group"
) +
theme_minimal(base_size = 14) +
theme(
panel.grid.major = element_line(color = "grey80"),
panel.grid.minor = element_blank()
) +
scale_color_manual(values = c(
"0–17" = "#E41A1C", # red
"18–29" = "#377EB8", # blue
"30–44" = "#4DAF4A", # green
"45–65" = "#984EA3", # purple
"66–99" = "#FF7F00" # orange
))

ggsave("out/total_relocations_by_age.png", width = 10, height = 6, dpi = 300)
population_df <- read_csv('new_data/age_cleaned.csv')
## New names:
## Rows: 100 Columns: 3
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," dbl
## (3): ...1, age, population
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
# Prepare a dataframe for storing total relocations per age
age_relocations <- data.frame()
# Loop through each age (0 to 99)
for (age in 0:99) {
total_relocations <- df %>%
filter(!is.na(.[[as.character(age)]])) %>%
summarise(total_relocations = sum(.[[as.character(age)]], na.rm = TRUE)) %>%
mutate(age = age)
age_relocations <- bind_rows(age_relocations, total_relocations)
}
# Merge with population data and compute relocation rate per 1000 people
age_relocations <- age_relocations %>%
left_join(population_df, by = "age") %>%
mutate(
relocation_rate = (total_relocations / population) * 1000,
age_group = case_when(
age <= 17 ~ "0–17",
age <= 29 ~ "18–29",
age <= 44 ~ "30–44",
age <= 65 ~ "45–65",
TRUE ~ "66–99"
)
)
# Create segment data for colored line segments
segment_data <- age_relocations %>%
arrange(age) %>%
mutate(
xend = lead(age),
yend = lead(relocation_rate),
group_end = lead(age_group)
) %>%
filter(!is.na(xend))
# Plot: relocation rate per 1000 people
ggplot() +
geom_line(data = age_relocations, aes(x = age, y = relocation_rate), color = "grey80", size = 1) +
geom_segment(data = segment_data, aes(x = age, y = relocation_rate,
xend = xend, yend = yend, color = age_group), size = 1.5) +
geom_point(data = age_relocations, aes(x = age, y = relocation_rate, color = age_group), size = 2) +
scale_x_continuous(breaks = seq(0, 99, 5)) +
labs(
title = "Relocation Rate per Age (per 1000 people)",
x = "Age",
y = "Relocations per 1000 People",
color = "Age Group"
) +
theme_minimal(base_size = 14) +
theme(
panel.grid.major = element_line(color = "grey80"),
panel.grid.minor = element_blank()
) +
scale_color_manual(values = c(
"0–17" = "#E41A1C", # red
"18–29" = "#377EB8", # blue
"30–44" = "#4DAF4A", # green
"45–65" = "#984EA3", # purple
"66–99" = "#FF7F00" # orange
))

ggsave("out/relocation_rate_by_age.png", width = 10, height = 6, dpi = 300)
### note: this plot may look very similar to the previous one but this one takes into the skewness of age distribution, i.e. there being fewer 99 year olds than 18 year olds.
creating cartograms
munic <- gadm(country = "DNK", level = 2, path = ".")
# Convert to sf
munic_sf <- st_as_sf(munic)
# Rename Århus to Aarhus in the NAME_2 column
munic_sf$NAME_2 <- gsub("^Århus$", "Aarhus", munic_sf$NAME_2)
munic_sf$NAME_2 <- gsub("^Høje Taastrup$", "Høje-Taastrup", munic_sf$NAME_2)
munic_sf$NAME_2 <- gsub("^Vesthimmerland$", "Vesthimmerlands", munic_sf$NAME_2)
# Transform to a Projected Coordinate System (ETRS89 / UTM zone 32N)
munic_sf <- st_transform(munic_sf, crs = 25832)
# ─── Define age groups ─────────────────────────────────────────────────────────
age_groups <- list(
"0-17" = 0:17,
"18-29" = 18:29,
"30-44" = 30:44,
"45-65" = 45:65,
"66-99" = 66:99
)
# Loop Over Age Groups
for (group_name in names(age_groups)) {
# Get the Age Range
age_range <- age_groups[[group_name]]
# Calculate Total Relocations per Municipality for the Current Age Group
relocations_per_muni <- df %>%
group_by(dest) %>%
summarise(total_relocations = sum(across(as.character(age_range)), na.rm = TRUE)) %>%
ungroup()
# Join with Spatial Data
muni_group_sf <- munic_sf %>%
left_join(relocations_per_muni, by = c("NAME_2" = "dest")) %>%
filter(!is.na(total_relocations))
# Create the Cartogram
muni_cartogram <- cartogram_cont(muni_group_sf, weight = "total_relocations", itermax = 10)
# Plot the Cartogram
p <- ggplot(muni_cartogram) +
geom_sf(aes(fill = total_relocations), color = "black", size = 0.2) +
scale_fill_viridis_c(option = "plasma", trans = "log", limits = c(1, 30000)) +
labs(
title = paste("Cartogram of Relocations (Age", group_name, ") per Municipality in Denmark"),
fill = "Total Relocations"
) +
theme_minimal(base_size = 14) +
theme(
legend.position = "right",
plot.title = element_text(size = 16, face = "bold")
)
# Save the Plot
ggsave(filename = paste0("out/relocations_wee__cartogram_", group_name, ".png"), plot = p, width = 10, height = 8, dpi = 300)
}
## Warning in scale_fill_viridis_c(option = "plasma", trans = "log", limits = c(1,
## : log-2.718282 transformation introduced infinite values.
pacman::p_load('magick')
# Define your age labels (in the same order as your cartogram PNGs)
age_labels <- c("0-17", "18-29", "30-44", "45-65", "66-99")
output_dir <- "out"
gif_dir <- "gifs"
dir.create(gif_dir, showWarnings = FALSE)
# Generate file paths for the images
image_files <- paste0(output_dir, "/relocations_wee__cartogram_", age_labels, ".png")
# Read and animate the images
if (all(file.exists(image_files))) {
images <- image_read(image_files)
animation <- image_animate(image_join(images), fps = 0.5) # 1 frame every 4 seconds
# Save the GIF
gif_filename <- file.path(gif_dir, "relocation_cartogram.gif")
image_write(animation, gif_filename)
message("GIF saved to: ", gif_filename)
} else {
warning("One or more image files are missing.")
}
## GIF saved to: gifs/relocation_cartogram.gif
creating popularity cartograms
library(readxl)
# Replace with your actual file path
population_file_path <- "new_data/FOLK1AM.xlsx"
# Extract the relevant data
munic_pop_df <- read_excel(population_file_path, skip = 2) %>%
select(municipality = `...1`,
population_jan_2025 = `2024M12`) %>%
mutate(municipality = trimws(municipality)) %>%
filter(!is.na(municipality), municipality != "Hele landet") %>%
filter(!municipality %in% c("Region Hovedstaden", "Region Sjælland",
"Region Syddanmark", "Region Midtjylland",
"Region Nordjylland"))
## New names:
## • `` -> `...1`
# View the cleaned population data
head(munic_pop_df)
## # A tibble: 6 × 2
## municipality population_jan_2025
## <chr> <dbl>
## 1 København 668906
## 2 Frederiksberg 106360
## 3 Dragør 14468
## 4 Tårnby 44009
## 5 Albertslund 28133
## 6 Ballerup 52744
# ─── Load Municipalities ───────────────────────────────────────────────
munic <- gadm(country = "DNK", level = 2, path = ".")
munic_sf <- st_as_sf(munic)
munic_sf$NAME_2 <- gsub("^Århus$", "Aarhus", munic_sf$NAME_2)
munic_sf$NAME_2 <- gsub("^Høje Taastrup$", "Høje-Taastrup", munic_sf$NAME_2)
munic_sf$NAME_2 <- gsub("^Vesthimmerland$", "Vesthimmerlands", munic_sf$NAME_2)
munic_sf <- st_transform(munic_sf, crs = 25832)
# ─── Load Population Data ──────────────────────────────────────────────
population_file_path <- "/Users/villiamjensen/Downloads/FOLK1AM.xlsx"
munic_pop_df <- read_excel(population_file_path, skip = 2) %>%
select(municipality = `...1`,
population_jan_2025 = `2024M12`) %>%
mutate(municipality = trimws(municipality)) %>%
filter(!is.na(municipality), municipality != "Hele landet") %>%
filter(!municipality %in% c("Region Hovedstaden", "Region Sjælland",
"Region Syddanmark", "Region Midtjylland",
"Region Nordjylland"))
## New names:
## • `` -> `...1`
# ─── Clean df$dest to match municipality names ─────────────────────────
df$dest <- gsub("^Århus$", "Aarhus", df$dest)
df$dest <- gsub("^Høje Taastrup$", "Høje-Taastrup", df$dest)
df$dest <- gsub("^Vesthimmerland$", "Vesthimmerlands", df$dest)
df$dest <- trimws(df$dest)
# ─── Define Age Groups ─────────────────────────────────────────────────
age_groups <- list(
"0-17" = 0:17,
"18-29" = 18:29,
"30-44" = 30:44,
"45-65" = 45:65,
"66-99" = 66:99
)
# ─── Precompute Popularity Indices ─────────────────────────────────────
popularity_all <- map_dfr(names(age_groups), function(group_name) {
age_range <- age_groups[[group_name]]
relocations_per_muni <- df %>%
group_by(dest) %>%
summarise(total_relocations = sum(across(as.character(age_range)), na.rm = TRUE)) %>%
ungroup()
relocations_per_muni %>%
left_join(munic_pop_df, by = c("dest" = "municipality")) %>%
mutate(
popularity_index = (total_relocations / population_jan_2025) * 1000,
age_group = group_name
) %>%
filter(!is.na(popularity_index))
})
# ─── Plot Cartograms ───────────────────────────────────────────────────
for (group_name in names(age_groups)) {
group_data <- popularity_all %>%
filter(age_group == group_name)
muni_group_sf <- munic_sf %>%
left_join(group_data, by = c("NAME_2" = "dest")) %>%
filter(!is.na(popularity_index))
muni_cartogram <- cartogram_cont(muni_group_sf, weight = "popularity_index", itermax = 10)
p <- ggplot(muni_cartogram) +
geom_sf(aes(fill = popularity_index), color = "black", size = 0.2) +
scale_fill_viridis_c(option = "plasma", trans = "log", limits = c(1, 65)) + ##the values were found by summary(popularity_all$popularity_index)
labs(
title = paste("Popularity Index (Age",group_name,"): Relocations per 1000 Residents"),
fill = "Relocations per 1000 Residents"
) +
theme_minimal(base_size = 14) +
theme(
legend.position = "right",
plot.title = element_text(size = 16, face = "bold")
)
ggsave(filename = paste0("out/popularity_cartogram_", group_name, ".png"),
plot = p, width = 10, height = 8, dpi = 300)
}
## Warning in scale_fill_viridis_c(option = "plasma", trans = "log", limits = c(1,
## : log-2.718282 transformation introduced infinite values.
# ─── Create Animated GIF ───────────────────────────────────────────────
pacman::p_load('magick')
age_labels <- names(age_groups)
output_dir <- "out"
gif_dir <- "gifs"
dir.create(gif_dir, showWarnings = FALSE)
image_files <- paste0(output_dir, "/popularity_cartogram_", age_labels, ".png")
if (all(file.exists(image_files))) {
images <- image_read(image_files)
animation <- image_animate(image_join(images), fps = 0.5)
gif_filename <- file.path(gif_dir, "popularity_cartogram.gif")
image_write(animation, gif_filename)
message("GIF saved to: ", gif_filename)
} else {
warning(" One or more image files are missing.")
}
## GIF saved to: gifs/popularity_cartogram.gif